maxlen = 128
batchz_size = 32
config_path = "/bert/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json"
dict_path = "/bert/chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt"
checkpoint_path = "/bert/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt"


def load_data(filename):
    D = []
    with open(filename, encoding='utf-8', mode="r") as f:
        for l in f:
            l = l.strip().strip("\n").strip("\u200b").strip().strip("\u200b").split(",")
            text = l[2]
            label = l[1]
            D.append((text, int(label)))
    return D


#加载数据集
train_data = load_data("datasets/sentiment/train.txt")
valid_data = load_data("datasets/sentiment/test.txt")
test_data = load_data("datasets/sentiment/test.txt")
test_data[:5]


# 模拟 标注和非标注数据
train_frac = 0.01 #标注比例
num_labeled = int(len(train_data) * train_frac) #num_labeled: 100
unlabeled_data = [(t, 2) for t, l in train_data[num_labeled:]]
train_data = train_data[:num_labeled]
